First, check package dependencies and install ProjecTILs

Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS="true")

remotes::install_github("carmonalab/UCell", ref="v1.3")
remotes::install_github("carmonalab/scGate")
remotes::install_github("carmonalab/ProjecTILs")

library(ProjecTILs)
library(Seurat)

Load reference atlas and query data

First, load the default reference TIL atlas. If no reference map file is provided, the function load.reference.map() will automatically download it from https://doi.org/10.6084/m9.figshare.12478571

ref <- load.reference.map()
## [1] "Loading Default Reference Atlas..."
## [1] "/Users/mass/Documents/Projects/Cell_clustering/ProjecTILs.demo/ref_TILAtlas_mouse_v1.rds"
## [1] "Loaded Reference map ref_TILAtlas_mouse_v1"

Let’s explore the reference atlas

refCols <- c("#edbe2a", "#A58AFF", "#53B400", "#F8766D", "#00B6EB", "#d1cfcc", "#FF0000", "#87f6a5", "#e812dd")
DimPlot(ref,label = T, cols = refCols)

See expression of important marker genes across reference subtypes

markers <- c("Cd4","Cd8a","Ccr7","Tcf7","Pdcd1","Havcr2","Tox","Izumo1r","Cxcr6","Xcl1","Gzmb","Gzmk","Ifng","Foxp3")
VlnPlot(ref,features=markers,stack = T,flip = T,assay = "RNA")

Now let’s load a query dataset - Miller et al., Nature Immunol (2019)

#A sample data set is provided with the ProjecTILs package
querydata <- ProjecTILs::query_example_seurat

More generally, it is possible to load a query matrix with gene names and barcodes (e.g. 10X format or raw counts)

##Raw count matrix from GEO
library(GEOquery)
geo_acc <- "GSE86028"
getGEOSuppFiles(geo_acc)

fname3 <- sprintf("%s/GSE86028_TILs_sc_wt_mtko.tpm.log2.txt.gz", geo_acc)
querydata3 <- read.sc.query(fname3, type = "raw.log2")

Run Projection algorithm

query.projected <- make.projection(querydata, ref=ref)
## [1] "Using assay RNA for query"
## [1] "37 out of 1501 ( 2% ) non-pure cells removed. Use filter.cells=FALSE to avoid pre-filtering"
## [1] "Aligning query to reference map for batch-correction..."
## 
## Projecting corrected query onto Reference PCA space
## 
## Projecting corrected query onto Reference UMAP space

NB: by default, make.projection() will pre-filter T cells using scGate. In case the input dataset is already pre-filtered, or if you are using a non-T cell reference atlas, you can disable this step using make.projection(querydata, ref=ref, filter.cells = FALSE).

Plot projection of new data over the reference in UMAP space. The contour lines display the density of projected query cells onto the reference map.

plot.projection(ref, query.projected)

Predict cell states

Predict the cell states in the query set using a nearest-neighbor algorithm

query.projected <- cellstate.predict(ref=ref, query=query.projected)
table(query.projected$functional.cluster)
## 
## CD8_EffectorMemory      CD8_NaiveLike            CD8_Tex           CD8_Tpex 
##                 61                 24               1308                 58 
##                Th1               Treg 
##                  1                 12

Plot the predicted composition of the query in terms of reference T cell subtypes

plot.statepred.composition(ref, query.projected,metric = "Percent")

How do the gene expression levels compare between reference and query for the different cell states?

plot.states.radar(ref, query=query.projected, min.cells=30)

Compare states across conditions

If we have multiple conditions (e.g. control vs. treatment, or samples from different tissues), we can search for discriminant genes between conditions (otherwise, by default this analysis is performed against the reference subtype as the ‘control’)

#Simulate a condition which e.g. increases Gzmb expression compared to control
query.control <- subset(query.projected, subset=`Gzmb` < 1.5)
query.perturb <- subset(query.projected, subset=`Gzmb` >= 1.5)

plot.states.radar(ref, query=list("Control" = query.control, "Query" = query.perturb))

In this toy example, where we simulated a condition that increases Gzmb expression compared to control, we expect cytotoxicity genes to drive differences.

discriminantGenes <- find.discriminant.genes(ref=ref, query=query.perturb, query.control=query.control, state="CD8_Tex")
head(discriminantGenes,n=10)
##                  p_val avg_log2FC pct.1 pct.2    p_val_adj
## Gzmb      3.842405e-94  2.8257408 1.000 0.848 1.050552e-89
## Gzmc      2.615333e-35  2.8814254 0.809 0.378 7.150581e-31
## Lgals1    5.268875e-24  0.4543825 1.000 1.000 1.440563e-19
## AA467197  2.264136e-22  1.1983647 0.845 0.530 6.190373e-18
## Mt1       3.582293e-19  1.1840434 0.776 0.457 9.794346e-15
## Ccl9      1.921541e-18  1.6471873 0.489 0.116 5.253686e-14
## Serpinb6b 3.538035e-16  0.8050444 0.858 0.598 9.673341e-12
## Mt2       1.750597e-14  0.9658675 0.613 0.274 4.786308e-10
## Gzme      4.430793e-14  2.0820514 0.462 0.165 1.211423e-09
## Prf1      5.926832e-14  0.5883041 0.503 0.177 1.620455e-09

We can use a volcano plot to display differentially expressed genes:

library(EnhancedVolcano)
## Loading required package: ggrepel
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
EnhancedVolcano(discriminantGenes, lab = rownames(discriminantGenes), x = "avg_log2FC", y = "p_val", pCutoff = 1e-09, 
    FCcutoff = 0.5, labSize = 5, legendPosition = "none", drawConnectors = F, title = "Gzmb_high vs. Gzmb_low (Tex)")
## Warning: Ignoring unknown parameters: xlim, ylim

Using a random subsetting, p-values should not be significant:

rand.list <- ProjecTILs:::randomSplit(query.projected, n=2, seed=1)
discriminantGenes <- find.discriminant.genes(ref=ref, query=rand.list[[1]], query.control=rand.list[[2]], state="CD8_Tex")
EnhancedVolcano(discriminantGenes, lab = rownames(discriminantGenes), x = "avg_log2FC", y = "p_val", pCutoff = 1e-09, 
    FCcutoff = 0.5, labSize = 5, legendPosition = "none", drawConnectors = F, title = "Random split (Tex)")

Find discriminant dimensions

The dimensions in UMAP space summarize the main axes of variability of the reference map. What if the query data contains novel states? We can search for additional, maximally discriminant dimensions (either in ICA or PCA space) that explain new variability in the query set.

As before, simulate a condition which increases Gzmb expression compared to control

#
query.control <- subset(query.projected, subset=`Gzmb` < 1.5)
query.perturb <- subset(query.projected, subset=`Gzmb` >= 1.5)

plot.states.radar(ref, query=list("Control" = query.control, "Query" = query.perturb))

In this toy example, we expect some gene module associated with granzymes to drive the discriminant analysis:

top.ica.wcontrol <- find.discriminant.dimensions(ref=ref, query=query.perturb, query.control=query.control)
head(top.ica.wcontrol)
##              stat  stat_abs        p_val
## ICA_26  0.4800657 0.4800657 0.000000e+00
## ICA_24 -0.3245565 0.3245565 7.521761e-12
## ICA_19  0.2854554 0.2854554 7.046419e-09
## ICA_42  0.2608306 0.2608306 3.341548e-07
## ICA_6   0.2407044 0.2407044 6.047000e-06
## ICA_28 -0.2129243 0.2129243 2.246895e-04
VizDimLoadings(ref, reduction = "ica", nfeatures = 10, dims=c(26,24,42), ncol=3)

Now we can plot the ICA dimension that captured the genetic changes associated to the perturbation of increasing Gzmb

plot3d <- plot.discriminant.3d(ref, query=query.perturb, query.control=query.control, extra.dim="ICA_26")
plot3d

We can plot other metadata in the z-axis of the UMAP, e.g. the cycling score calculated by the TILPRED cycling signature

plot3d <- plot.discriminant.3d(ref, query.projected, extra.dim="cycling.score")
plot3d

Focus the plot only on a specific state

plot3d <- plot.discriminant.3d(ref, query.projected, extra.dim="cycling.score", query.state="CD8_Tex")
plot3d

Using a random subsetting, p-values should not be significant:

rand.list <- ProjecTILs:::randomSplit(query.projected, n=2, seed=1)
top.ica.ks.rand <- find.discriminant.dimensions(ref=ref, query=rand.list[[1]], query.control=rand.list[[2]], reduction="ica")
top.ica.ttest.rand <- find.discriminant.dimensions(ref=ref, query=rand.list[[1]], query.control=rand.list[[2]], reduction="ica", test = "t-test")

Further information

ProjecTILs repository

ProjecTILs case studies - INDEX - Repository

Publication: Andreatta et al Nat. Comm. 2021


  1. ↩︎

  2. ↩︎